import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from copy import deepcopy
import plotly.io as pio
pio.renderers.default='notebook'
import warnings
from pandas.core.common import SettingWithCopyWarning
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)
# load the big dataset into a pandas dataframe
big_df = pd.read_csv("../data/loan.csv", index_col=False, dtype='unicode')
# have a quick glance at the dataframe
big_df.head()
| id | member_id | loan_amnt | funded_amnt | funded_amnt_inv | term | int_rate | installment | grade | sub_grade | ... | num_tl_90g_dpd_24m | num_tl_op_past_12m | pct_tl_nvr_dlq | percent_bc_gt_75 | pub_rec_bankruptcies | tax_liens | tot_hi_cred_lim | total_bal_ex_mort | total_bc_limit | total_il_high_credit_limit | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1077501 | 1296599 | 5000 | 5000 | 4975 | 36 months | 10.65% | 162.87 | B | B2 | ... | NaN | NaN | NaN | NaN | 0 | 0 | NaN | NaN | NaN | NaN |
| 1 | 1077430 | 1314167 | 2500 | 2500 | 2500 | 60 months | 15.27% | 59.83 | C | C4 | ... | NaN | NaN | NaN | NaN | 0 | 0 | NaN | NaN | NaN | NaN |
| 2 | 1077175 | 1313524 | 2400 | 2400 | 2400 | 36 months | 15.96% | 84.33 | C | C5 | ... | NaN | NaN | NaN | NaN | 0 | 0 | NaN | NaN | NaN | NaN |
| 3 | 1076863 | 1277178 | 10000 | 10000 | 10000 | 36 months | 13.49% | 339.31 | C | C1 | ... | NaN | NaN | NaN | NaN | 0 | 0 | NaN | NaN | NaN | NaN |
| 4 | 1075358 | 1311748 | 3000 | 3000 | 3000 | 60 months | 12.69% | 67.79 | B | B5 | ... | NaN | NaN | NaN | NaN | 0 | 0 | NaN | NaN | NaN | NaN |
5 rows × 111 columns
My goal is to help Lending Club team gain insight on what consitutes a 'Good Loans'and what constitutes a 'Bad Loans'. Good Loans are those loans with status of "Current", "Issued" and "Fully Paid" in the loan_status column. Bad Loans are those loans with the status other than "Current", "Issued" and "Fully Paid".
Particularly, I plan to answer the following questions for the Lending Club team:
Hence, the relevant variables for my theme, I believe, are:
id: A unique LC assigned ID for the loan listing.
term: The number of payments on the loan. Values are in months and can be either 36 or 60.
grade: LC assigned loan grade.
purpose: A category provided by the borrower for the loan request.
emp_title: The job title supplied by the Borrower when applying for the loan.
emp_length: Employment length in years. Possible values are between 0 and 10 where 0 means less than one year and 10 means ten or more years.
loan_status: Current status of the loan.
# important variable to consider
imp_var = [
'id',
'term',
'grade',
'emp_title',
'emp_length',
'purpose',
'loan_status'
]
# pandas dataframe consisting of important variables
df = big_df[imp_var]
# have a quick glance at the new dataframe
df.head(10)
| id | term | grade | emp_title | emp_length | purpose | loan_status | |
|---|---|---|---|---|---|---|---|
| 0 | 1077501 | 36 months | B | NaN | 10+ years | credit_card | Fully Paid |
| 1 | 1077430 | 60 months | C | Ryder | < 1 year | car | Charged Off |
| 2 | 1077175 | 36 months | C | NaN | 10+ years | small_business | Fully Paid |
| 3 | 1076863 | 36 months | C | AIR RESOURCES BOARD | 10+ years | other | Fully Paid |
| 4 | 1075358 | 60 months | B | University Medical Group | 1 year | other | Current |
| 5 | 1075269 | 36 months | A | Veolia Transportaton | 3 years | wedding | Fully Paid |
| 6 | 1069639 | 60 months | C | Southern Star Photography | 8 years | debt_consolidation | Fully Paid |
| 7 | 1072053 | 36 months | E | MKC Accounting | 9 years | car | Fully Paid |
| 8 | 1071795 | 60 months | F | NaN | 4 years | small_business | Charged Off |
| 9 | 1071570 | 60 months | B | Starbucks | < 1 year | other | Charged Off |
Check if any of these columns have missing value.
def check_missing_value(arr):
for var in arr:
if df[var].isnull().values.any() == True:
print(f'The column {var} has missing values.')
print('\n')
check_missing_value(imp_var)
The column emp_title has missing values. The column emp_length has missing values.
Cleaning emp_length column by removing all the strings and converting employment length to integer. Missing values are filled with -1 value. The rows with the value '< 1 year' is converted to 0 and '10+ years' is converted to 10. This step is mainly done to make it easier to sort the column by years for visualization purpose. After they are sorted, their original name is restored. Finally, renaming it as emp_length_years to make it easier to identify the unit for this column.
# helper function to build df
def build_df(var_name):
# group the data based on var_name and store it as a dictionary
data = dict(df[var_name].value_counts())
# convert dictionary to pandas dataframe
data = pd.DataFrame(data.items(),columns = [var_name,'count'])
return data
# rename emp_length to emp_length_years
df.rename(columns = {'emp_length':'emp_length_years'}, inplace = True)
# remove years from row values and make '< 1 year' as 0 and
# '10+ years' as 10.
df.replace({'emp_length_years':
{'< 1 year':0,
'1 year':1,
'2 years':2,
'3 years':3,
'4 years':4,
'5 years':5,
'6 years':6,
'7 years':7,
'8 years':8,
'9 years':9,
'10+ years':10
}
}, inplace=True)
# Handling missing values by replacing it with -1
if df['emp_length_years'].isnull().values.any() == True:
df['emp_length_years'].fillna(-1, inplace=True)
# convert the column dtype from float to int
df = df.copy(deep=True).astype({'emp_length_years': 'int64'})
# group data by emp_length_years
emp_df = build_df('emp_length_years').sort_values(by=['emp_length_years']).reset_index(drop=True)
# take a glance on the update
print('------------------------\n\
-1 denotes missing values\n \
0 denotes less than 1 year\n \
10 denotes 10 or more years\n \
------------------------')
emp_df.style.hide_index()
------------------------ -1 denotes missing values 0 denotes less than 1 year 10 denotes 10 or more years ------------------------
| emp_length_years | count |
|---|---|
| -1 | 1075 |
| 0 | 4583 |
| 1 | 3240 |
| 2 | 4388 |
| 3 | 4095 |
| 4 | 3436 |
| 5 | 3282 |
| 6 | 2229 |
| 7 | 1773 |
| 8 | 1479 |
| 9 | 1258 |
| 10 | 8879 |
Now, we move on to clean emp_title column. Since this column is derived from an open ended question, it is not really a categorical column. However, we could still derive some insights from this column. The missing values will need to be replaced with something like 'employer not mentioned', and we need to lowercase all the datapoints and remove extra whitespaces.
# lowercase all the data points in the column
df['emp_title'] = df['emp_title'].str.lower()
# remove all extra whitespaces
df['emp_title'] = df['emp_title'].str.strip()
# replace NaN missing values with 'employer not mentioned'
df.fillna('employer not mentioned', inplace=True)
# glance at the value count of the modified column
print("Borrower's top 20 employers (including the count of missing values):\n")
print(df['emp_title'].value_counts()[:21])
print('\n\n')
Borrower's top 20 employers (including the count of missing values): employer not mentioned 2459 us army 212 bank of america 138 at&t 83 walmart 82 wells fargo 71 ibm 69 kaiser permanente 69 verizon wireless 65 ups 63 self 60 usaf 58 usps 58 us air force 57 self employed 57 walgreens 53 state of california 49 us navy 49 lockheed martin 49 home depot 47 us postal service 45 Name: emp_title, dtype: int64
Create loan_type column that categorize each entry either 'Good Loans' or 'Bad Loans'. Loans with status of 'Fully Paid', 'Issued', or 'Current' as given 'Good Loans' value. The rest of the columns are given 'Bad Loans' value.
# loans with status of 'Fully Paid', 'Issued', or 'Current' as given 'Good Loans' value in the loan_type column
df.loc[((df['loan_status'] == 'Fully Paid') | (df['loan_status'] == 'Issued') | (df['loan_status'] == 'Current')), 'loan_type'] = 'Good Loans'
# the rest of the columns are given 'Bad Loans' value.
df['loan_type'] = df['loan_type'].fillna('Bad Loans')
# glance at the dataset
df.head(10)
| id | term | grade | emp_title | emp_length_years | purpose | loan_status | loan_type | |
|---|---|---|---|---|---|---|---|---|
| 0 | 1077501 | 36 months | B | employer not mentioned | 10 | credit_card | Fully Paid | Good Loans |
| 1 | 1077430 | 60 months | C | ryder | 0 | car | Charged Off | Bad Loans |
| 2 | 1077175 | 36 months | C | employer not mentioned | 10 | small_business | Fully Paid | Good Loans |
| 3 | 1076863 | 36 months | C | air resources board | 10 | other | Fully Paid | Good Loans |
| 4 | 1075358 | 60 months | B | university medical group | 1 | other | Current | Good Loans |
| 5 | 1075269 | 36 months | A | veolia transportaton | 3 | wedding | Fully Paid | Good Loans |
| 6 | 1069639 | 60 months | C | southern star photography | 8 | debt_consolidation | Fully Paid | Good Loans |
| 7 | 1072053 | 36 months | E | mkc accounting | 9 | car | Fully Paid | Good Loans |
| 8 | 1071795 | 60 months | F | employer not mentioned | 4 | small_business | Charged Off | Bad Loans |
| 9 | 1071570 | 60 months | B | starbucks | 0 | other | Charged Off | Bad Loans |
Saving the final dataframe as a CSV and Excel file
# saving the dataframe in a CSV format.
df.to_csv('data/lending_club_modified_data.csv', encoding='utf-8', index=False)
#savig the dataframe in an excel format.
with pd.ExcelWriter('data/lending_club_modified_data.xlsx') as writer:
df.to_excel(writer)
# load the dataset into a pandas dataframe
df = pd.read_csv("../data/lending_club_modified_data.csv", index_col=False, dtype='unicode')
# have a quick glance at the dataframe
df.head()
| id | term | grade | emp_title | emp_length_years | purpose | loan_status | loan_type | |
|---|---|---|---|---|---|---|---|---|
| 0 | 1077501 | 36 months | B | employer not mentioned | 10 | credit_card | Fully Paid | Good Loans |
| 1 | 1077430 | 60 months | C | ryder | 0 | car | Charged Off | Bad Loans |
| 2 | 1077175 | 36 months | C | employer not mentioned | 10 | small_business | Fully Paid | Good Loans |
| 3 | 1076863 | 36 months | C | air resources board | 10 | other | Fully Paid | Good Loans |
| 4 | 1075358 | 60 months | B | university medical group | 1 | other | Current | Good Loans |
# function to create dataframe for pie chart
def build_df(var_name):
# group the data based on var_name and store it as a dictionary
data = dict(df[var_name].value_counts())
# convert dictionary to pandas dataframe
data = pd.DataFrame(data.items(),columns = [var_name,'count'])
return data
# function to plot pie chart
def plot_pretty_pie_chart(label, value, title_name):
# create a plotly pie chart
fig = go.Figure(data=[go.Pie(labels=label, values=value)])
# write label and percentage inside/outside the chart
fig.update_traces(textinfo='label+percent')
# pull each pie slices a bit to make it look nicer
fig.update_traces(pull=[0.1, 0.1, 0.1, 0.1])
# assign bright color combinations
fig.update_traces(marker=dict(colors=['#286086', 'maroon', 'blue']))
# adjust the size of the figure
fig.update_layout(width=600, height=550)
# give title to the chart and align it to the center
fig.update_layout(title=title_name,title_x=0.5)
# play around with the font and its size of the body
fig.update_layout(font_family="Arial", font_size = 15)
# play around with the font and its size of the title
fig.update_layout(title = {'font_color':'navy', 'font_size':25})
# display the chart
fig.show()
df = df.copy(deep=True).astype({'emp_length_years': 'int64'})
Distribution of loan status
loan_status_df = build_df('loan_status')
loan_status_df.style.hide_index()
| loan_status | count |
|---|---|
| Fully Paid | 32950 |
| Charged Off | 5627 |
| Current | 1140 |
Pie chart showing distribution of loan status
loan_status_type = loan_status_df['loan_status']
loan_status_count = loan_status_df['count']
plot_pretty_pie_chart(loan_status_type, loan_status_count, 'Loan Status')
loan_type_df = build_df('loan_type')
loan_type_df.style.hide_index()
| loan_type | count |
|---|---|
| Good Loans | 34090 |
| Bad Loans | 5627 |
loan_type = loan_type_df['loan_type']
loan_type_count = loan_type_df['count']
plot_pretty_pie_chart(loan_type, loan_type_count, 'Good Loans and Bad Loans')
def generate_df_by_segment_column(segment_col):
loan_segment_by_col_df = pd.DataFrame(df.groupby([segment_col,'loan_type'])['loan_type'].count())
loan_segment_by_col_df['percent'] = loan_segment_by_col_df.groupby(level=0).apply(lambda x: round(100 * x / float(x.sum())))
loan_segment_by_col_df = loan_segment_by_col_df.rename(columns={'loan_type':'count'})
loan_segment_by_col_df['percent'] = loan_segment_by_col_df['percent'].apply(np.int64)
return loan_segment_by_col_df
def plot_grouped_bar_chart(data, x_value, y_value, legend, title_name):
df = data.reset_index()
fig = px.bar(data_frame=df, x=x_value, y=y_value, color=legend, labels={
x_value: x_value.capitalize(),
y_value: y_value.capitalize()
}, barmode="group", hover_data = {'percent':True,'count':True},color_discrete_sequence=['maroon','#286086'])
fig.update_layout(width=600, height=550)
fig.update_layout()
fig.update_layout(title=title_name,title_x=0.5)
fig.update_layout(legend_title_text='Loan type')
fig.update_layout(font_family="Arial", font_size = 15)
fig.update_layout(title = {'font_color':'navy', 'font_size':25})
fig.show()
Generate the segmented df
loan_segment_by_term_df = generate_df_by_segment_column('term')
loan_segment_by_term_df
| count | percent | ||
|---|---|---|---|
| term | loan_type | ||
| 36 months | Bad Loans | 3227 | 11 |
| Good Loans | 25869 | 89 | |
| 60 months | Bad Loans | 2400 | 23 |
| Good Loans | 8221 | 77 |
plot_grouped_bar_chart(loan_segment_by_term_df, 'term', 'count', 'loan_type','Loan Segment by Term (Total)')
Plot grouped bar chart based on proportion
plot_grouped_bar_chart(loan_segment_by_term_df, 'term', 'percent', 'loan_type','Loan Segment by Term (%)')
Generate the segmented df
loan_segment_by_grade_df = generate_df_by_segment_column('grade')
loan_segment_by_grade_df
| count | percent | ||
|---|---|---|---|
| grade | loan_type | ||
| A | Bad Loans | 602 | 6 |
| Good Loans | 9483 | 94 | |
| B | Bad Loans | 1425 | 12 |
| Good Loans | 10595 | 88 | |
| C | Bad Loans | 1347 | 17 |
| Good Loans | 6751 | 83 | |
| D | Bad Loans | 1118 | 21 |
| Good Loans | 4189 | 79 | |
| E | Bad Loans | 715 | 25 |
| Good Loans | 2127 | 75 | |
| F | Bad Loans | 319 | 30 |
| Good Loans | 730 | 70 | |
| G | Bad Loans | 101 | 32 |
| Good Loans | 215 | 68 |
loan_segment_by_grade_df = generate_df_by_segment_column('grade')
loan_segment_by_grade_df
| count | percent | ||
|---|---|---|---|
| grade | loan_type | ||
| A | Bad Loans | 602 | 6 |
| Good Loans | 9483 | 94 | |
| B | Bad Loans | 1425 | 12 |
| Good Loans | 10595 | 88 | |
| C | Bad Loans | 1347 | 17 |
| Good Loans | 6751 | 83 | |
| D | Bad Loans | 1118 | 21 |
| Good Loans | 4189 | 79 | |
| E | Bad Loans | 715 | 25 |
| Good Loans | 2127 | 75 | |
| F | Bad Loans | 319 | 30 |
| Good Loans | 730 | 70 | |
| G | Bad Loans | 101 | 32 |
| Good Loans | 215 | 68 |
Generate grouped bar chart (total)
plot_grouped_bar_chart(loan_segment_by_grade_df, 'grade', 'count', 'loan_type', 'Loan Segment by Grade (Total)')
Generate grouped bar chart (%)
plot_grouped_bar_chart(loan_segment_by_grade_df, 'grade', 'percent', 'loan_type', 'Loan Segment by Grade (%)')
# helper functions to get good loan df
def get_segmented_good_loan_df(var_name):
data = generate_df_by_segment_column(var_name).reset_index()
data = data[data['loan_type']=='Good Loans']
return data.reset_index(drop=True)
# helper functions to get bad loan df
def get_segmented_bad_loan_df(var_name):
data = generate_df_by_segment_column(var_name).reset_index()
data = data[data['loan_type']=='Bad Loans']
return data
# helper function to plot line chart
def plot_line_chart(data, x_value, y_value, title_name,color):
df = data.reset_index()
fig = px.line(data_frame=df, x=x_value, y=y_value, labels={
x_value: 'Employment Length (Years)',
y_value: 'Percent'
}, hover_data = {'percent':True,'count':True},color_discrete_sequence=[color], orientation='h')
fig.update_layout(width=600, height=550)
fig.update_layout()
fig.update_layout(title=title_name,title_x=0.5)
#fig.update_layout(legend_title_text='Loan type')
fig.update_layout(font_family="Times New Roman", font_size = 15)
fig.update_layout(title = {'font_color':'navy', 'font_size':25})
fig.show()
print('------------------------\n\
-1 denotes missing values\n \
0 denotes less than 1 year\n \
10 denotes 10 or more years\n \
------------------------')
emp_length_good_loan_df = get_segmented_good_loan_df('emp_length_years')
#emp_length_good_loan_df.replace({'emp_length_years': {-1: 'Not mentioned', 0:'< 1', 10: '10+'}}, inplace=True)
emp_length_good_loan_df.style.hide_index()
------------------------ -1 denotes missing values 0 denotes less than 1 year 10 denotes 10 or more years ------------------------
| emp_length_years | loan_type | count | percent |
|---|---|---|---|
| -1 | Good Loans | 847 | 79 |
| 0 | Good Loans | 3944 | 86 |
| 1 | Good Loans | 2784 | 86 |
| 2 | Good Loans | 3821 | 87 |
| 3 | Good Loans | 3540 | 86 |
| 4 | Good Loans | 2974 | 87 |
| 5 | Good Loans | 2824 | 86 |
| 6 | Good Loans | 1922 | 86 |
| 7 | Good Loans | 1510 | 85 |
| 8 | Good Loans | 1276 | 86 |
| 9 | Good Loans | 1100 | 87 |
| 10 | Good Loans | 7548 | 85 |
print('------------------------\n\
-1 denotes missing values\n \
0 denotes less than 1 year\n \
10 denotes 10 or more years\n \
------------------------')
plot_line_chart(emp_length_good_loan_df, 'emp_length_years', 'percent','Good Loan and Employment Duration','#286086')
------------------------ -1 denotes missing values 0 denotes less than 1 year 10 denotes 10 or more years ------------------------
print('------------------------\n\
-1 denotes missing values\n \
0 denotes less than 1 year\n \
10 denotes 10 or more years\n \
------------------------')
emp_length_bad_loan_df = get_segmented_bad_loan_df('emp_length_years')
#emp_length_bad_loan_df.replace({'emp_length_years': {-1: 'Not mentioned', 0:'< 1', 10: '10+'}}, inplace=True)
emp_length_bad_loan_df.style.hide_index()
------------------------ -1 denotes missing values 0 denotes less than 1 year 10 denotes 10 or more years ------------------------
| emp_length_years | loan_type | count | percent |
|---|---|---|---|
| -1 | Bad Loans | 228 | 21 |
| 0 | Bad Loans | 639 | 14 |
| 1 | Bad Loans | 456 | 14 |
| 2 | Bad Loans | 567 | 13 |
| 3 | Bad Loans | 555 | 14 |
| 4 | Bad Loans | 462 | 13 |
| 5 | Bad Loans | 458 | 14 |
| 6 | Bad Loans | 307 | 14 |
| 7 | Bad Loans | 263 | 15 |
| 8 | Bad Loans | 203 | 14 |
| 9 | Bad Loans | 158 | 13 |
| 10 | Bad Loans | 1331 | 15 |
print('------------------------\n\
-1 denotes missing values\n \
0 denotes less than 1 year\n \
10 denotes 10 or more years\n \
------------------------')
plot_line_chart(emp_length_bad_loan_df, 'emp_length_years', 'percent','Bad Loan and Employment Duration','maroon')
------------------------ -1 denotes missing values 0 denotes less than 1 year 10 denotes 10 or more years ------------------------
get_segmented_good_loan_df = dict(df['emp_title'].value_counts())
# convert dictionary to pandas dataframe
data = pd.DataFrame(get_segmented_good_loan_df.items(),columns = ['emp_title','count'])
print('The top employers for LC borrowers:')
emp_title_df = build_df('emp_title').head(15)
emp_title_df.style.hide_index()
The top employers for LC borrowers:
| emp_title | count |
|---|---|
| employer not mentioned | 2459 |
| us army | 212 |
| bank of america | 138 |
| at&t | 83 |
| walmart | 82 |
| wells fargo | 71 |
| ibm | 69 |
| kaiser permanente | 69 |
| verizon wireless | 65 |
| ups | 63 |
| self | 60 |
| usaf | 58 |
| usps | 58 |
| us air force | 57 |
| self employed | 57 |
Generate df
bad_loan_purpose_df = get_segmented_bad_loan_df('purpose').reset_index(drop=True).sort_values(by=['percent'], ascending=False)
bad_loan_purpose_df.style.hide_index()
| purpose | loan_type | count | percent |
|---|---|---|---|
| small_business | Bad Loans | 475 | 26 |
| renewable_energy | Bad Loans | 19 | 18 |
| educational | Bad Loans | 56 | 17 |
| moving | Bad Loans | 92 | 16 |
| other | Bad Loans | 633 | 16 |
| debt_consolidation | Bad Loans | 2767 | 15 |
| house | Bad Loans | 59 | 15 |
| medical | Bad Loans | 106 | 15 |
| vacation | Bad Loans | 53 | 14 |
| home_improvement | Bad Loans | 347 | 12 |
| credit_card | Bad Loans | 542 | 11 |
| car | Bad Loans | 160 | 10 |
| major_purchase | Bad Loans | 222 | 10 |
| wedding | Bad Loans | 96 | 10 |
Generate horizontal bar chart
fig = px.bar(bad_loan_purpose_df, x="percent", y="purpose", labels={
'percent': 'Percent',
'purpose': 'Purpose'}, orientation='h', hover_data = {'count':True})
fig.update_layout(yaxis={'categoryorder':'total ascending'})
fig.update_layout(width=700, height=550)
fig.update_traces(marker_color='maroon')
fig.update_layout(font_family="Arial", font_size = 15)
fig.update_layout(title='Purpose for Bad Loans',title_x=0.5)
fig.update_layout(title = {'font_color':'Navy', 'font_size':25})
fig.show()
bad_loan_purpose_df = get_segmented_bad_loan_df('purpose').reset_index(drop=True).sort_values(by=['count'], ascending=False)
bad_loan_purpose_df.style.hide_index()
| purpose | loan_type | count | percent |
|---|---|---|---|
| debt_consolidation | Bad Loans | 2767 | 15 |
| other | Bad Loans | 633 | 16 |
| credit_card | Bad Loans | 542 | 11 |
| small_business | Bad Loans | 475 | 26 |
| home_improvement | Bad Loans | 347 | 12 |
| major_purchase | Bad Loans | 222 | 10 |
| car | Bad Loans | 160 | 10 |
| medical | Bad Loans | 106 | 15 |
| wedding | Bad Loans | 96 | 10 |
| moving | Bad Loans | 92 | 16 |
| house | Bad Loans | 59 | 15 |
| educational | Bad Loans | 56 | 17 |
| vacation | Bad Loans | 53 | 14 |
| renewable_energy | Bad Loans | 19 | 18 |
fig = px.bar(bad_loan_purpose_df, x="count", y="purpose", labels={
'count': 'Count',
'purpose': 'Purpose'}, orientation='h', hover_data = {'count':True})
fig.update_layout(yaxis={'categoryorder':'total ascending'})
fig.update_layout(width=700, height=550)
fig.update_traces(marker_color='maroon')
fig.update_layout(font_family="Arial", font_size = 15)
fig.update_layout(title='Purpose for Bad Loans',title_x=0.5)
fig.update_layout(title = {'font_color':'Navy', 'font_size':25})
fig.show()